Load data and prepared fpkm values
datasets = as.data.frame(scan("Stanford_datasets.txt",list(setname="",seqBatch="",species="",tissue=""),sep="\t"))
fpkmMat <- as.matrix(read.table('Stanford_datasets_fpkmMat.txt',header=FALSE,sep='\t'))
Apply PCA method to uncorrected data
transposeLogTransformed_fpkmMat = t(logTransformed_fpkmMat)
pca_proc <- prcomp(transposeLogTransformed_fpkmMat[,apply(transposeLogTransformed_fpkmMat, 2, var, na.rm=TRUE) != 0],scale=TRUE,center=TRUE)
Check PCA statistics
summary(pca_proc)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 54.2999 43.5061 39.3992 35.81511 31.77971 25.86337
## Proportion of Variance 0.2012 0.1292 0.1059 0.08755 0.06893 0.04565
## Cumulative Proportion 0.2012 0.3304 0.4364 0.52391 0.59284 0.63849
## PC7 PC8 PC9 PC10 PC11 PC12
## Standard deviation 24.77243 22.33937 21.69784 21.1730 19.82838 18.28041
## Proportion of Variance 0.04188 0.03406 0.03213 0.0306 0.02683 0.02281
## Cumulative Proportion 0.68037 0.71443 0.74656 0.7772 0.80399 0.82680
## PC13 PC14 PC15 PC16 PC17 PC18
## Standard deviation 17.41969 17.00162 16.05240 15.65210 15.16221 14.56541
## Proportion of Variance 0.02071 0.01973 0.01759 0.01672 0.01569 0.01448
## Cumulative Proportion 0.84751 0.86724 0.88483 0.90155 0.91724 0.93172
## PC19 PC20 PC21 PC22 PC23 PC24
## Standard deviation 13.08324 12.96681 12.29599 12.07214 11.38150 11.18347
## Proportion of Variance 0.01168 0.01148 0.01032 0.00995 0.00884 0.00854
## Cumulative Proportion 0.94340 0.95487 0.96519 0.97514 0.98398 0.99252
## PC25 PC26
## Standard deviation 10.47129 5.633e-14
## Proportion of Variance 0.00748 0.000e+00
## Cumulative Proportion 1.00000 1.000e+00
Transfer PCA data to plots
plotData = datasets[,c("setname","species","tissue")]
plotData$PC1 <- pca_proc$x[,1]
plotData$PC2 <- pca_proc$x[,2]
plotData$PC3 <- pca_proc$x[,3]
Plot the first and the second principal components

Plot the first and the second principal components with centroids
plotData_pca <- prcomp(pca_proc$x[, -1])
fviz_pca_ind(plotData_pca,
geom.ind = "point", # show points only (nbut not "text")
col.ind = plotData$species, # color by groups
addEllipses = TRUE, # Concentration ellipses
legend.title = "Species",
labs = "PCA before correction"
)

Plot the first, the second and the third principal components
Test for significance of correlations between the matched tissues PC values of human and mouse
cor.test(plotData$PC1[1:13],plotData$PC1[14:26],method="pearson")
##
## Pearson's product-moment correlation
##
## data: plotData$PC1[1:13] and plotData$PC1[14:26]
## t = 1.4318, df = 11, p-value = 0.18
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1978558 0.7775285
## sample estimates:
## cor
## 0.3963364
cor.test(plotData$PC2[1:13],plotData$PC2[14:26],method="pearson")
##
## Pearson's product-moment correlation
##
## data: plotData$PC2[1:13] and plotData$PC2[14:26]
## t = 4.6926, df = 11, p-value = 0.000658
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.4829263 0.9432117
## sample estimates:
## cor
## 0.8166208
cor.test(plotData$PC3[1:13],plotData$PC3[14:26],method="pearson")
##
## Pearson's product-moment correlation
##
## data: plotData$PC3[1:13] and plotData$PC3[14:26]
## t = 0.8382, df = 11, p-value = 0.4198
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.3537165 0.7013257
## sample estimates:
## cor
## 0.2450217